package com.helwen.pdf;

import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.Test;

import java.io.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author: heqiwen
 * @date : 2021/7/12 16:36
 */
public class CopyPdf {

    @Test
    public void copyFile() throws Exception {
        File pdf = null;
        BufferedInputStream buf = null;
        FileOutputStream out = null;
        try {
           /* pdf = new File("C:\\develop\\docs\\枝江\\居民健康档案\\第 17 部分 门诊摘要.pdf");
            //resp.setContentLength((int)pdf.length());
            FileInputStream input = new FileInputStream(pdf);
            buf = new BufferedInputStream(input);
            int readBytes = 0;
            File word = new File("C:\\develop\\docs\\枝江\\公卫2\\template_demo.docx");
            out = new FileOutputStream(word);
            BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(out,"utf-8"));
            while ((readBytes = buf.read()) != -1) {
                bw.write(readBytes);

            }
            bw.close();*/
            String regEx="[`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~！@#￥%……&*（）——+|{}【】‘；：”“’。，、？]";
            Pattern p = Pattern.compile(regEx);
            Matcher m = p.matcher("门(急)诊检验报告单头（对应物理表名：ADI_LAREXA_INFO）");
            String s =m.replaceAll("").trim();
            System.out.println("s:"+s);

            String ftype=" var b3 ";
            ftype=ftype.trim();
            System.out.println(ftype);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (out != null) out.close();
            if (buf != null) buf.close();

        }
    }

    /**
     *
     * @Title: getTextFromPdf
     * @Description: 读取pdf文件内容
     * @param filePath
     * @return: 读出的pdf的内容
     */
    public static String getTextFromPdf(String filePath) {
        return null;
    }

    public static void main(String[] args) {
        try{
            String filePath="C:\\develop\\docs\\枝江\\居民健康档案\\第 17 部分 门诊摘要.pdf";
            PDDocument document=PDDocument.load(new FileInputStream(new File(filePath)));
            //获取一个PDFTextStripper文本剥离对象
            PDFTextStripper textStripper =new PDFTextStripper();
            Writer writer=new OutputStreamWriter(new FileOutputStream(new File("C:\\develop\\docs\\枝江\\居民健康档案\\demo.docx")),"utf-8");
            textStripper.writeText(document,writer);
            int pages=document.getNumberOfPages();
            // 设置按顺序输出
            textStripper.setSortByPosition(true);
            textStripper.setStartPage(1);
            textStripper.setEndPage(pages);
            String content=textStripper.getText(document);
            //vo.setContent(content);

            System.out.println("内容:"+content);
            System.out.println("全部页数"+document.getNumberOfPages());
            //关闭文档
            document.close();

        }catch(Exception e){

        }
    }
}
